# A function for performing simple linear regression with a cached QR decomposition
# this should be lighter weight than lm().  Works with summary.qr.lm() to give
# vcov calculations etc.
qr.lm <- function(y, qx) {
  if(length(y)!=nrow(qx$qr)) {
    #probably don't match because of a prior
    if(length(y)!=(nrow(qx$qr)-ncol(qx$qr))) stop("number of covariate observations does not match number of docs")
    #if it passes this check its the prior. thus
    y <- c(y,rep(0, ncol(qx$qr)))
  }
  beta <- solve.qr(qx, y)
  residuals <- qr.resid(qx,y)
  fitted.values <- qr.fitted(qx,y)
  df.residual <- length(fitted.values) - qx$rank
  out <- list(coefficients=beta, residuals=residuals, 
              fitted.values=fitted.values, 
              df.residual=df.residual, rank=qx$rank, qr=qx)
  out 
}
#this function rewrites the summary.lm() function
# to calculate from our reduced regression
summary.qr.lm <- function (object) {
  z <- object
  p <- z$rank
  rdf <- z$df.residual
  
  Qr <- object$qr
  n <- nrow(Qr$qr)
  p1 <- 1L:p
  r <- z$residuals
  f <- z$fitted.values
  
  mss <- ifelse(attr(z$terms, "intercept"), sum((f - mean(f))^2), sum(f^2)) 
  rss <- sum(r^2)
  
  resvar <- rss/rdf
  R <- chol2inv(Qr$qr[p1, p1, drop = FALSE])
  se <- sqrt(diag(R) * resvar)
  est <- z$coefficients[Qr$pivot[p1]]
  sigma <- sqrt(resvar)
  list(est=est, vcov=(sigma^2 * R))
}

is.wholenumber <- function(x, tol = .Machine$double.eps^0.5) {
  abs(x - round(x)) < tol
} 

posint <- function(x) {
  all(is.wholenumber(x)) & all(x>0)
}

#summary.estimateEffect <- function(object, topics=NULL, nsim=500, ...) {
#  if(is.null(topics)) topics <- object$topics
#  if(any(!(topics %in% object$topics))) {
#    stop("Some topics specified with the topics argument are not available in this estimateEffect object.")
#  }
#  tables <- vector(mode="list", length=length(topics))
#  for(i in 1:length(topics)) {
#    topic <- topics[i]
#    sims <- lapply(object$parameters[[which(object$topics==topic)]], function(x) rmvnorm(nsim, x$est, x$vcov))
#    sims <- do.call(rbind,sims)
#    est<- colMeans(sims)
#    se <- sqrt(apply(sims,2, stats::var))
#    tval <- est/se
#    rdf <- nrow(object$data) - length(est)
#    p <- 2 * stats::pt(abs(tval), rdf, lower.tail = FALSE)
#    
#    coefficients <- cbind(est, se, tval, p)
#    rownames(coefficients) <- attr(object$parameters[[1]][[1]]$est, "names") 
#    colnames(coefficients) <- c("Estimate", "Std. Error", "t value", "Pr(>|t|)")
#    tables[[i]] <- coefficients
#  }
#  out <- list(call=object$call, topics=topics, tables=tables)
#  class(out) <- "summary.estimateEffect"
#  return(out)
#}


####Estimate Effect Function ####
estimateEffect_cluster <- function(formula,
                           stmobj, metadata=NULL,
                           uncertainty=c("Global", "Local", "None"), documents=NULL,
                           nsims=25, prior=NULL) {
  origcall <- match.call()
  thetatype <- match.arg(uncertainty)
  if(thetatype=="None") nsims <- 1 #override nsims for no uncertainty
  
  if(!is.null(documents)) {
    # Convert the corpus to the internal STM format
    args <- asSTMCorpus(documents, data=metadata)
    documents <- args$documents
    metadata <- args$data
  }
  
  ##
  #Step 1: Extract the formula and do some error checking
  ##
  if(!inherits(formula,"formula")) stop("formula must be a formula object.")
  if(!is.null(metadata) & !is.data.frame(metadata)) metadata <- as.data.frame(metadata)
  termobj <- terms(formula, data=metadata)
  if(attr(termobj, "response")==1){
    #if a response is specified we have to parse it and remove it.
    # as.character of a formula turns
    # dv ~ iv
    # into:  c("~", "dv", "iv")
    response <- as.character(formula)[2] #second object is the response in this cases
    K <- eval(parse(text=response))
    if(!(posint(K) && max(K)<=stmobj$settings$dim$K)) stop("Topics specified as response in formula must be a set of positive integers equal to or less than the number of topics in the model.")   
    #now we reconstruct the formula removing the response
    formula <- formula(paste(as.character(formula)[c(1,3)], collapse = " "))
    #the above used to be the below code but the use got deprecated.
    #as.formula(as.character(formula)[c(1,3)])
    termobj <- terms(formula, data=metadata)
  } else {
    K <- 1:stmobj$settings$dim$K
  }
  mf <- model.frame(termobj, data=metadata)
  xmat <- model.matrix(termobj,data=metadata)
  varlist <- all.vars(termobj)
  if(!is.null(metadata)) {
    data <- metadata[, varlist, drop=FALSE]
  } else {
    templist <- list()
    for(i in 1:length(varlist)) {
      templist[[i]] <- get(varlist[i])
    }
    data <- data.frame(templist)
    names(data) <- varlist
    rm(templist)
  }
  metadata.full <- metadata
  metadata <- data
  rm(data)
  ##
  #Step 2: Compute the QR decomposition
  ##
  # all the models here are essentially just OLS regressions
  # becuase we will run them many times we want to cache the 
  # expensive components in advance.
  if(!is.null(prior)) {
    if(!is.matrix(prior)) {
      prior <- diag(prior, nrow=ncol(xmat))
    } 
    if(ncol(prior)!=ncol(xmat)) stop("number of columns in prior does not match columns in design matrix")
    prior.pseudo <- chol(prior)
    xmat <- rbind(xmat,prior.pseudo)
  }
  
  ##  
  #Step 3: Calculate Coefficients
  ##
  print(paste("Running bootstrapped estimateEffect function with nsims = ",nsims))
  progress_bar = txtProgressBar(min=0, max=nsims, style = 3, char="=")
  storage <- vector(mode="list", length=length(K))
  for(i in 1:nsims) {
    setTxtProgressBar(progress_bar, value = i)
    var_problem <- "yes"
    while(var_problem=="yes"){
      countries=as.character(sample(unique(metadata.full$Actor),size = length(unique(metadata.full$Actor)),replace = TRUE))
      
      boot.dat <- lapply(countries,FUN = function(x){
        boot.country <- xmat[which(metadata.full$Actor==x),]
        return(boot.country)
      })
      boot.dat <- do.call(rbind,boot.dat)
      var_problem <- ifelse(any(apply(boot.dat[,2:ncol(boot.dat)],2,var)==0),"yes","no")
    }
    
    qx <- qr(boot.dat)
    summary(qx$qr[,2])
    if(qx$rank < ncol(boot.dat)) {
      prior <- diag(1e-5, nrow=ncol(boot.dat))
      prior.pseudo <- chol(prior)
      boot.dat <- rbind(boot.dat,prior.pseudo)
      qx <- qr(boot.dat)
      warning("Covariate matrix is singular.  See the details of ?estimateEffect() for some common causes.
             Adding a small prior 1e-5 for numerical stability.")
    }
    
    # 3a) simulate theta
    if(thetatype=="None") thetasims <- stmobj$theta
    else {
      thetasims <- thetaPosterior(stmobj, nsims=1, type=thetatype, documents=documents)
      thetasims <- do.call(rbind, thetasims)
    }
    # 3b) calculate model
    for(k in K) {
      boot.thetasims <- lapply(countries,FUN = function(x){
        boot.country.theta <- thetasims[which(metadata.full$Actor==x),]
        return(boot.country.theta)
      })
      boot.thetasims <- do.call(rbind,boot.thetasims)
      lm.mod <- qr.lm(boot.thetasims[,k], qx)
      storage[[which(k==K)]][[i]] <- summary.qr.lm(lm.mod)      
    }
  }
  close(progress_bar)
  
  ##
  #Step 4: Return Values
  ##
  # 4a) Give it an S3 class
  # 4b) Return the terms object, the call, the formula, the data etc.
  # 4c) anything else we need for a summary() type function
  toreturn <- list(parameters=storage, topics=K,
                   call=origcall, uncertainty=thetatype, formula=formula, data=metadata,
                   modelframe=mf, varlist=varlist)
  class(toreturn) <- "estimateEffect"
  return(toreturn)
}

rmvnorm<-function(n,mu,Sigma,chol.Sigma=chol(Sigma)) {
  E<-matrix(rnorm(n*length(mu)),n,length(mu))
  t(  t(E%*%chol.Sigma) +c(mu))
}
summary.estimateEffect_comb <- function(object, topics=NULL, nsim=500, ...) {
  if(is.null(topics)) topics <- object$topics
  if(any(!(topics %in% object$topics))) {
    stop("Some topics specified with the topics argument are not available in this estimateEffect object.")
  }
  tables <- vector(mode="list", length=length(topics))
  tables_vcov <- vector(mode="list", length=length(topics))
  for(i in 1:length(topics)) {
    topic <- topics[i]
    sims <- lapply(object$parameters[[which(object$topics==topic)]], function(x) rmvnorm(nsim, x$est, x$vcov))
    sims <- do.call(rbind,sims)
    est<- colMeans(sims)
    se <- sqrt(apply(sims,2, stats::var))
    tval <- est/se
    rdf <- nrow(object$data) - length(est)
    p <- 2 * stats::pt(abs(tval), rdf, lower.tail = FALSE)
    
    coefficients <- cbind(est, se, tval, p)
    rownames(coefficients) <- attr(object$parameters[[1]][[1]]$est, "names") 
    colnames(coefficients) <- c("Estimate", "Std. Error", "t value", "Pr(>|t|)")
    tables[[i]] <- coefficients
    
    # Save Vcov Matrix
    vcov_save <- matrix(nrow = length(attr(object$parameters[[1]][[1]]$est, "names")),
                        ncol = length(attr(object$parameters[[1]][[1]]$est, "names")))
    rownames(vcov_save) <- attr(object$parameters[[1]][[1]]$est, "names") 
    colnames(vcov_save) <- attr(object$parameters[[1]][[1]]$est, "names") 
    vars <- apply(sims,2, stats::var)
    # Variance for each variable
    for(j in 1:nrow(vcov_save)){
      vcov_save[j,j] <- vars[j]
    }
    # Covariance
    print(paste("Running summary on the estimateEffect function for topic ",topic))
    progress_bar = txtProgressBar(min=0, max=nrow(vcov_save), style = 3, char="=")
    for(k in 1:nrow(vcov_save)){
      setTxtProgressBar(progress_bar, value = k)
      for(l in 1:ncol(vcov_save)){
        if(k!=l){
          vcov_save[k,l] <- cov(sims[,k],sims[,l])
        }
      }
    }
    close(progress_bar)
    tables_vcov[[i]] <- vcov_save
    
  }
  out <- list(call=object$call, topics=topics, tables=tables)
  out_vcov <- list(call=object$call, topics=topics, tables=tables_vcov)
  class(out) <- "summary.estimateEffect"
  class(out_vcov) <- "summary.estimateEffect.vcov"
  out_all <- list(out,out_vcov)
  return(out_all)
}

calc_interaction_regression <- function(var_1 = NULL,
                                        var_moderator = NULL,
                                        reg_model = NULL){
  vcov_mat <- vcov(reg_model)
  
  moderator_vars <- row.names(vcov_mat)[grepl(var_moderator,row.names(vcov_mat))]
  name.interact <- moderator_vars[grepl(":",moderator_vars)]
  
  tab_out <- data.frame()
  
  for(i in 1:length(name.interact)){
    level_extract <- name.interact[i]
    
    L = vcov_mat[row.names(vcov_mat)==var_1,
                 colnames(vcov_mat)==var_1]+ # Variance image_lag6m
      vcov_mat[row.names(vcov_mat)==level_extract,
               colnames(vcov_mat)==level_extract]+ # Variance moderator value
      2*vcov_mat[row.names(vcov_mat)==level_extract,
                 colnames(vcov_mat)==var_1] # 2*Covariance of moderator and image
    se_interacted <- sqrt(L)
    
    res_est_out <- tidy(reg_model) %>% 
      data.frame() %>% 
      filter(effect == "fixed")
    
    est_base <- res_est_out[res_est_out$term==var_1,"estimate"]
    se_base <- res_est_out[res_est_out$term==var_1,"std.error"]
    
    est_interacted <- res_est_out[res_est_out$term==var_1,"estimate"]+
      res_est_out[res_est_out$term==name.interact[i],"estimate"]
    
    tab_save <- data.frame(Estimate = c(est_base,
                                        est_interacted),
                           Std.Error = c(se_base,
                                         se_interacted))
    
    tab_save$tval <- tab_save$Estimate/tab_save$Std.Error
    rdf <- nobs(reg_model) - nrow(res_est_out)
    tab_save$p <- 2 * stats::pt(abs(tab_save$tval), rdf, lower.tail = FALSE)
    tab_save$moderator_level <- c("baseline",level_extract)
    
    tab_out <- bind_rows(tab_out,tab_save)
    
  }
  
  return(tab_out %>% filter(!duplicated(moderator_level)))
}


calc_interaction_regression_stm <- function(var_1 = NULL,
                                        var_mod = NULL,
                                        topic = NULL,
                                        estimate_object = NULL){
  mod_x <- summary.estimateEffect_comb(estimate_object,topics = topic)
  vcov_mat.s <- mod_x[[2]]
  vcov_mat <- vcov_mat.s[["tables"]] %>% data.frame()
  
  moderator_vars <- row.names(vcov_mat)[grepl(var_mod,row.names(vcov_mat))]
  row.interact <- moderator_vars[grepl(":",moderator_vars)]
  col.interact <- gsub(":",".",row.interact)
  col.interact <- gsub(" ",".",col.interact)
  col.interact <- gsub("-",".",col.interact)
  
  tab_out <- data.frame()
  
  for(i in 1:length(row.interact)){
    level_extract <- row.interact[i]
    
    L = vcov_mat[row.names(vcov_mat)==var_1,
                 names(vcov_mat)==var_1]+ # Variance image_lag6m
      vcov_mat[row.names(vcov_mat)==row.interact[i],
               names(vcov_mat)==col.interact[i]]+ # Variance moderator value
      2*vcov_mat[row.names(vcov_mat)==row.interact[i],
                 names(vcov_mat)==var_1] # 2*Covariance of moderator and image
    se_out <- sqrt(L)
    res_mod <- mod_x[[1]]
    res_est_out <- res_mod$tables %>% data.frame()
    est_out <- res_est_out[row.names(res_est_out)==var_1,"Estimate"]+
      res_est_out[row.names(res_est_out)==row.interact[i],"Estimate"]
    
    tab_save <- data.frame(Estimate = c(res_est_out[row.names(res_est_out)==var_1,"Estimate"],
                                        est_out),
                           Std.Error = c(res_est_out[row.names(res_est_out)==var_1,"Std..Error"],
                                         se_out))
    
    tab_save$tval <- tab_save$Estimate/tab_save$Std.Error
    rdf <- nrow(data.sum) - nrow(res_est_out)
    tab_save$p <- 2 * stats::pt(abs(tab_save$tval), rdf, lower.tail = FALSE)
    tab_save$Topic <- topic
    tab_save$moderator_level <- c("baseline",level_extract)
    
    tab_out <- bind_rows(tab_out,tab_save)
    
  }
  
  return(tab_out %>% filter(!duplicated(moderator_level)))
}

